# rm(list = ls())
library(LaplacesDemon)
library(rstan)
library(gtools)
# example for complete case C -> D
generateComp <- function(N, D.tran, q1, q2)
{
  # set.seed(1987)
  A.traj = c() # time trajectory for discrete-time variable
  B.traj = c() # time trajectory for continuous-time variable
  # sampled states
  D.states = c()
  C.states = c()
  # CPTs for D and C, no arcs in inital BN
  D.theta = c(0.2, 0.8)
  C.theta = c(0.4, 0.6)
  
  inten = as.data.frame(matrix(c(-q1,q1,q2,-q2), ncol = 2, byrow = TRUE))
  
  D.currstate = rcat(1, D.theta)
  C.currstate = rcat(1, C.theta)
  D.states = c(D.states, D.currstate)
  C.states = c(C.states, C.currstate)
  currTime = 0
  time.traj = c()
  B.traj = c()
  time.int = c()
  # generate complete trajectory
  while(currTime <=N)
  {
    time.traj = c(time.traj, currTime)
    B.traj = c(B.traj, currTime)
    # choose rate 
    q = -inten[C.currstate, C.currstate]
    # generate next time
    time = rexp(1, rate = q)
    time.int = c(time.int, time)
    # reject after next discrete-time slice
    C.currstate = setdiff(c(1,2), tail(C.states, 1))
    C.states = c(C.states, C.currstate)
    currTime = currTime + time
  }
  
  # generate D given C
  C.ind = c()
  for(i in 2:N)
  {
    m = max(which(B.traj <= i))
    C.ind = c(C.ind, m)
    trandis = D.tran[C.states[m],,D.currstate]
    D.currstate = rcat(1, trandis)
    D.states = c(D.states, D.currstate)
  }
  
  incomp = list(time.traj = time.traj, C.states = C.states, D.tran.states = D.states)
  comp = list(D.C.states = C.states[C.ind], D.states = D.states, C.states = C.states, time.int = time.int)
  return(list(incomp = incomp, comp = comp))
}

computeSuff <- function(data)
{
  comp = data$comp
  D.C.states = comp$D.C.states
  D.states = comp$D.states
  C.states = comp$C.states
  time.int = comp$time.int
  # compute sufficient statistics for D
  DCD.suff = array(data = rep(0,8), dim = c(2,2,2))
  DC.suff = array(data = rep(0,8), dim = c(2,2,2))
  for(i in 1:(length(D.states)-1))
  {
    DCD.suff[D.C.states[i], D.states[i+1], D.states[i]] =   DCD.suff[D.C.states[i], D.states[i+1], D.states[i]] + 1
  }
  for(i in 1:(length(D.states) - 1))
  {
    DC.suff[D.C.states[i], 1, D.states[i]] =   DC.suff[D.C.states[i], 1, D.states[i]] + 1
    DC.suff[D.C.states[i], 2, D.states[i]] =   DC.suff[D.C.states[i], 2, D.states[i]] + 1
  }

  # compute sufficient statistics for C
  C.M.suff  = c(rep(0,2))
  C.T.suff  = c(rep(0,2))
  for(i in 1:(length(C.states) - 1))
  {
    C.M.suff[C.states[i]] =   C.M.suff[C.states[i]] + 1
    C.T.suff[C.states[i]] =   C.T.suff[C.states[i]] + time.int[i]
  }
  D.suff = list(DCD.suff = DCD.suff, DC.suff = DC.suff)
  C.suff = list(C.M.suff = C.M.suff, C.T.suff = C.T.suff)
  return(list(D.suff = D.suff, C.suff = C.suff))
}
# parameters for true network
# CPTs for D with multinomial parameters from Dirichlet distributions wit alpha = c(1,1)
q1 = rgamma(1, shape = 2, rate = 2)
q2 = rgamma(1, shape = 2, rate = 2)
trueq = c(q1, q2)
D.tran = array(data = rep(0,8), dim = c(2,2,2))
alpha = c(1,1) # hyperparameters
for(i in 1:2)
{
  for(j in 1:2)
    D.tran[i,, j] = rdirichlet(1, alpha = alpha)
}
# fix data for test
testDat = generateComp(10000 , D.tran, q1, q2)

true.ll = c()
est.ll = c()
N.slice = c(10, 20, 40, 80, 160, 320, 640, 1024, 2048, 4096)
est.q.list = list()
est.D.tran.list = list()
for(k in 1:length(N.slice))
{
  N = N.slice[k]
  # intensities for C from Gamma distribution with hyperparameters alpha(shape) = 2, beta(rate) = 2
  # generate data to learn with given length of sequence
  learnDat = generateComp(N , D.tran, q1, q2)
  suff = computeSuff(learnDat)
  DCD.suff = suff$D.suff$DCD.suff
  DCD.suff
  DC.suff = suff$D.suff$DC.suff
  C.M.suff = suff$C.suff$C.M.suff
  C.T.suff = suff$C.suff$C.T.suff
  est.D.tran = (DCD.suff+1)/(DC.suff + 2)
  estq = (C.M.suff + 1)/(C.T.suff + 2)
  est.q.list[[length(est.q.list) + 1]] = estq
  est.D.tran.list[[length(est.D.tran.list) + 1]] = est.D.tran
  # abs(est.D.tran - D.tran)
  # generate data for compute distance
  # testDat = generateComp(N , D.tran, q1, q2)
  test.suff = computeSuff(testDat)
  DCD.suff = test.suff$D.suff$DCD.suff
  DCD.suff
  DC.suff = test.suff$D.suff$DC.suff
  C.M.suff = test.suff$C.suff$C.M.suff
  C.T.suff = test.suff$C.suff$C.T.suff
  
  true.ll = c(true.ll, sum(DCD.suff * log(D.tran))+ sum(C.M.suff * log(trueq)) - sum(trueq * C.T.suff))
  est.ll = c(est.ll, sum(DCD.suff * log(est.D.tran))+ sum(C.M.suff * log(estq)) - sum(estq * C.T.suff))
}

abs(true.ll - est.ll)
true.ll

dataCDcompLL = data.frame(N.slice = log10(N.slice), est.ll = est.ll, true.ll = true.ll,  diff = abs(true.ll - est.ll))
dataCDcompLL
data.est.q = matrix(c(unlist(est.q.list)), byrow = T, ncol = 2)
colnames(data.est.q) = paste("q",c(1:2), sep = "")
rownames(data.est.q) = N.slice
trueq = matrix(trueq, nrow = 1)
colnames(trueq) = paste("q",c(1:4), sep = "")
write.csv(dataCDcompLL, "~/Papers/LearnHTBN/CD(complete)LL.diff.csv", row.names = FALSE, quote = FALSE)
write.list(est.D.tran.list, "~/Papers/LearnHTBN/CD(complete).est.CPT.csv", quote = FALSE, row.names = FALSE, t.name = N.slice, eol = "\n")
write.csv(data.est.q, "~/Papers/LearnHTBN/CD(complete).est.q.csv", quote = FALSE)
write.csv(trueq, "~/Papers/LearnHTBN/CD(complete).true.q.csv", quote = FALSE)
write.csv(D.tran, "~/Papers/LearnHTBN/CD(complete).true.CPT.csv", quote = FALSE)


